### K means functions
update_delta <- function(kappa, e_data, K){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  # take type estimates as given
  e_data_k <- kappa[e_data[,2]]
  
  # update delta with sample means
  delta_updated <- matrix(0,K,T)
  for (k in 1:K){
    delta_updated[k,] <- sapply(1:T,
                                function(t) sum(e_data[e_data[,3]==t & e_data_k==k,1]*
                                                  e_data[e_data[,3]==t & e_data_k==k,5])/
                                  sum(e_data[e_data[,3]==t & e_data_k==k,5]))
  }
  rownames(delta_updated) <- 1:K
  colnames(delta_updated) <- 1:T
  
  # return updated delta with some summary statistics
  summary <- cbind(1:K, sapply(1:K, function(k) sum(kappa==k)) )
  colnames(summary) <- c("type", "Nk")
  ans <- list(delta=delta_updated,
              summary=summary)
  return(ans)
}

update_type <- function(delta, e_data, K){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  # update type estimates
  kappa_updated <- rep(0,N)
  SSE_full <- rep(0,N)
  for (i in unique(e_data[,2])){
    time_index <- (e_data[,3])[e_data[,2]==i]
    SSE <- rep(0,K)
    
    for (k in 1:K){
      deltak <- delta[k,time_index]
      SSE[k] <- sum((e_data[e_data[,2]==i,1] - deltak)^2)
    }
    kappa_updated[i] <- which.min(SSE)
    SSE_full[i] <- min(SSE)
  }
  
  # return updated kappa estimates with minimized objective function
  ans <- list(kappa=kappa_updated, 
              MSE=sum(SSE_full)/(N*T))
  return(ans)
}

kmeans_fun <- function(kappa, e_data, K, eps, maxiter){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  temp1 <- update_delta(kappa, e_data, K)
  temp2 <- update_type(temp1$delta, e_data, K)
  
  kappa_old <- kappa
  kappa_new <- temp2$kappa
  
  estimates_new <- temp1$delta
  estimates_old <- estimates_new - eps - 1
  
  noi <- 1
  MSE <- temp2$MSE
  
  while (max(abs(estimates_new-estimates_old)) > eps & 
         noi < maxiter & 
         sum(kappa_old!=kappa_new) > 0){
    
    if (length(unique(kappa_new))==K){
      
      estimates_old <- estimates_new
      kappa_old <- kappa_new
      
      temp1 <- update_delta(kappa_old, e_data, K)
      temp2 <- update_type(temp1$delta, e_data, K)
      
      estimates_new <- temp1$delta
      kappa_new <- temp2$kappa
      
    } else {
      
      tempK <- length(unique(kappa_new))
      tempDelta <- estimates_new
      
      estimates_old <- estimates_new
      kappa_old <- sapply(kappa_new, function(x) which(unique(kappa_new)==x))
      
      temp1 <- update_delta(kappa_old, e_data, tempK)
      temp2 <- update_type(rbind(temp1$delta, 
                                 tempDelta[(1:K)[!(1:K %in% unique(kappa_new))],]), 
                           e_data, K)
      
      estimates_new <- rbind(temp1$delta, 
                             tempDelta[(1:K)[!(1:K %in% unique(kappa_new))],])
      kappa_new <- temp2$kappa
      
    }
    
    MSE <- c(MSE, temp2$MSE)
    noi <- noi + 1
    
  }
  
  ans <- list(delta=temp1$delta, 
              kappa=temp2$kappa, MSE=MSE, noi=noi, summary=temp1$summary)
  return(ans)
}

update_delta_spline <- function(kappa, e_data, K){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  # take type estimates as given
  e_data_k <- kappa[e_data[,2]]
  
  # update delta with constant function
  X <- matrix(0,nrow(e_data),K)
  for (i in 1:nrow(X)){
    X[i,] <- 1:K==e_data_k[i]
  }
  delta_updated <- t(matrix(solve(t(X)%*%diag(e_data[,5])%*%X)%*%t(X)%*%diag(e_data[,5])%*%e_data[,1],
                            1,K))
  rownames(delta_updated) <- 1:K
  colnames(delta_updated) <- c("intercept")
  
  # return updated delta with some summary statistics
  summary <- cbind(1:K, sapply(1:K, function(k) sum(kappa==k)) )
  colnames(summary) <- c("type", "Nk")
  ans <- list(delta=delta_updated,
              summary=summary)
  return(ans)
}

update_type_spline <- function(delta, e_data, K){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  # update type estimates
  kappa_updated <- rep(0,N)
  SSE_full <- rep(0,N)
  for (i in unique(e_data[,2])){
    time_index <- (e_data[,3])[e_data[,2]==i]
    SSE <- rep(0,K)
    
    for (k in 1:K){
      deltak <- rep(1,length(time_index))*delta[k,]
      SSE[k] <- sum((e_data[e_data[,2]==i,1] - deltak)^2)
    }
    kappa_updated[i] <- which.min(SSE)
    SSE_full[i] <- min(SSE)
  }
  
  # return updated kappa estimates with minimized objective function
  ans <- list(kappa=kappa_updated, 
              MSE=sum(SSE_full)/(N*T))
  return(ans)
}

kmeans_fun_spline <- function(kappa, e_data, K, eps, maxiter){
  # the order is Y, id, time, treatment timing, weight, X
  N <- max(e_data[,2])
  T <- max(e_data[,3])
  p <- ncol(e_data) - 5
  
  temp1 <- update_delta_spline(kappa, e_data, K)
  temp2 <- update_type_spline(temp1$delta, e_data, K)
  
  kappa_old <- kappa
  kappa_new <- temp2$kappa
  
  estimates_new <- temp1$delta
  estimates_old <- estimates_new - eps - 1
  
  noi <- 1
  MSE <- temp2$MSE
  
  while (max(abs(estimates_new-estimates_old)) > eps & 
         noi < maxiter & 
         sum(kappa_old!=kappa_new) > 0){
    
    if (length(unique(kappa_new))==K){
      
      estimates_old <- estimates_new
      kappa_old <- kappa_new
      
      temp1 <- update_delta_spline(kappa_old, e_data, K)
      temp2 <- update_type_spline(temp1$delta, e_data, K)
      
      estimates_new <- temp1$delta
      kappa_new <- temp2$kappa
      
    } else {
      
      tempK <- length(unique(kappa_new))
      tempDelta <- estimates_new
      
      estimates_old <- estimates_new
      kappa_old <- sapply(kappa_new, function(x) which(unique(kappa_new)==x))
      
      temp1 <- update_delta_spline(kappa_old, e_data, tempK)
      temp2 <- update_type_spline(matrix(rbind(temp1$delta, 
                                               as.matrix(tempDelta[(1:K)[!(1:K %in% unique(kappa_new))],])),
                                         K),
                                  e_data, K)
      
      estimates_new <- matrix(rbind(temp1$delta, 
                                    as.matrix(tempDelta[(1:K)[!(1:K %in% unique(kappa_new))],])),
                              K)
      kappa_new <- temp2$kappa
      
    }
    
    MSE <- c(MSE, temp2$MSE)
    noi <- noi + 1
    
  }
  
  ans <- list(delta=temp1$delta, 
              kappa=temp2$kappa, MSE=MSE, noi=noi, summary=temp1$summary)
  return(ans)
}

simulationK2 <- function(T,N,B,sd,rho){
  data <- matrix(0,(T+1)*N,5)
  data[,2] <- kronecker(1:N,rep(1,T+1))
  data[,3] <- 1:(T+1)
  data[,4] <- kronecker(ceiling(runif(N,0,2)),rep(1,T+1))
  data[,5] <- 1
  
  # serially correlated errors
  rhoMat <- sapply(1:(T+1),
                   function(t) rho^((t-1):(t-T-1)))
  rhoMat <- t(rhoMat)
  rhoMat[rhoMat > 1] <- 0
  epsilon <- rnorm(N*(T+1),0,sd*(1-rho^2)^(0.5))
  epsilon[(0:(N-1))*(T+1)+1] <- rnorm(N,0,sd)
  U <- c(sapply(1:N,
                function(i) rhoMat%*%epsilon[(i-1)*(T+1)+1:(T+1)]))
  
  # unit fixed-effects
  type <- apply(matrix(data[,4],T+1,N),2,mean)
  unitFE <- rnorm(N,37,17)*(type==1) + rnorm(N,39,17)*(type==2)
  
  # treatment status
  treatment <- (runif(N,0,1)<1/3)*(type==1) + (runif(N,0,1)<2/3)*(type==2)
  
  # generate outcome
  data[,1] <- kronecker(unitFE,rep(1,T+1)) +
    (1.66*(data[,3]-T))*(data[,4]==1) +
    4*kronecker(treatment,rep(1,T+1))*(data[,4]==1)*(data[,3]==T+1) +
    1*kronecker(treatment,rep(1,T+1))*(data[,4]==2)*(data[,3]==T+1) +
    U
  
  # Kmeans
  SubData <- data
  SubData[,1] <- data[,1] - c(0,data[1:(nrow(data)-1),1])
  SubData <- SubData[SubData[,3]<=T & SubData[,3]>1,]
  SubData[,3] <- SubData[,3]-1
  result <- matrix(0,N+2,B)
  
  for (t in 1:B){
    init <- ceiling(runif(N,0,2))
    kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
    kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
    result[,t] <- c(init,kmeans$MSE[length(kmeans$MSE)],kmeans_spline$MSE[length(kmeans_spline$MSE)])
  }
  
  init <- result[1:N,which.min(result[N+1,])]
  kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
  init <- result[1:N,which.min(result[N+2,])]
  kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
  
  TypeEstimation <- c(sum(kmeans$kappa==sapply(1:N,function(i) mean(data[data[,2]==i,4])))/N,
                 sum(kmeans$kappa==sapply(1:N,function(i) 3-mean(data[data[,2]==i,4])))/N)
  TypeEstimation_spline <- c(sum(kmeans_spline$kappa==sapply(1:N,function(i) mean(data[data[,2]==i,4])))/N,
                             sum(kmeans_spline$kappa==sapply(1:N,function(i) 3-mean(data[data[,2]==i,4])))/N)
  
  classification <- c(max(TypeEstimation), max(TypeEstimation_spline))
  
  # Type-specific diff-in-diff
  CATTkmeans <- c(mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]))
  weight <- (c(sum(treatment==1 & kmeans$kappa==1), sum(treatment==1 & kmeans$kappa==2)))
  weight[is.nan(CATTkmeans)] <- 0
  ATTkmeans <- sum((CATTkmeans*weight)[is.nan(CATTkmeans)==0])/sum(weight)
  
  CATTkmeans_spline <- c(mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]))
  weight <- (c(sum(treatment==1 & kmeans_spline$kappa==1), sum(treatment==1 & kmeans_spline$kappa==2)))
  weight[is.nan(CATTkmeans_spline)] <- 0
  ATTkmeans_spline <- sum((CATTkmeans_spline*weight)[is.nan(CATTkmeans_spline)==0])/sum(weight)
  
  # Diff-in-diff
  ATTdid <- mean(data[data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
    mean(data[data[,3]==T & treatment[data[,2]]==0,1])
  
  # synthetic control
  synthdata <- cbind(matrix(data[treatment[data[,2]]==0,1],T+1),
                     matrix(data[treatment[data[,2]]==1,1],T+1))
  synthdata <- t(synthdata)
  ATTsc <- sc_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  # synthetic diff-in-diff
  ATTsynthdid <- synthdid_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  return(c(ATTdid,ATTsc,ATTsynthdid,
           CATTkmeans[order(TypeEstimation,decreasing=TRUE)],ATTkmeans,
           CATTkmeans_spline[order(TypeEstimation,decreasing=TRUE)],ATTkmeans_spline,
           classification))
}

simulationK3 <- function(T,N,B,sd,rho){
  data <- matrix(0,(T+1)*N,5)
  data[,2] <- kronecker(1:N,rep(1,T+1))
  data[,3] <- 1:(T+1)
  data[,4] <- kronecker(sapply(runif(N,0,1),
                               function(v) 1+(v>0.4)+(v>0.8) ),rep(1,T+1))
  data[,5] <- 1
  
  # serially correlated errors
  rhoMat <- sapply(1:(T+1),
                   function(t) rho^((t-1):(t-T-1)))
  rhoMat <- t(rhoMat)
  rhoMat[rhoMat > 1] <- 0
  epsilon <- rnorm(N*(T+1),0,sd*(1-rho^2)^(0.5))
  epsilon[(0:(N-1))*(T+1)+1] <- rnorm(N,0,sd)
  U <- c(sapply(1:N,
                function(i) rhoMat%*%epsilon[(i-1)*(T+1)+1:(T+1)]))
  
  # unit fixed-effects
  type <- apply(matrix(data[,4],T+1,N),2,mean)
  unitFE <- rnorm(N,37,17)*(type==1) + rnorm(N,39,17)*(type==2) + rnorm(N,35,17)*(type==3)
  
  # treatment status
  treatment <- (runif(N,0,1)<1/3)*(type==1) + (runif(N,0,1)<1/2)*(type==2) + (runif(N,0,1)<0.5)*(type==3)
  
  # generate outcome
  data[,1] <- kronecker(unitFE,rep(1,T+1)) +
    (2.74*(data[,3]-T))*(data[,4]==1) + (1.42*data[,3]-T)*(data[,4]==2) +
    5*kronecker(treatment,rep(1,T+1))*(data[,4]==1)*(data[,3]==T+1) +
    kronecker(treatment,rep(1,T+1))*(data[,4]==2)*(data[,3]==T+1) +
    U
  
  # Kmeans
  SubData <- data
  SubData[,1] <- data[,1] - c(0,data[1:(nrow(data)-1),1])
  SubData <- SubData[SubData[,3]<=T & SubData[,3]>1,]
  SubData[,3] <- SubData[,3]-1
  result <- matrix(0,N+2,B)
  
  for (t in 1:B){
    init <- ceiling(runif(N,0,3))
    while (length(unique(init))<3){
      init <- ceiling(runif(N,0,3))
    }
    kmeans <- kmeans_fun(init,as.matrix(SubData),3,0,200)
    kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),3,0,200)
    result[,t] <- c(init,kmeans$MSE[length(kmeans$MSE)],kmeans_spline$MSE[length(kmeans_spline$MSE)])
  }
  
  init <- result[1:N,which.min(result[N+1,])]
  kmeans <- kmeans_fun(init,as.matrix(SubData),3,0,200)
  init <- result[1:N,which.min(result[N+2,])]
  kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),3,0,200)
  
  map1 <- c(1,2,3)
  map2 <- c(1,3,2)
  map3 <- c(2,1,3)
  # type1=estimated type2, type2=estimated type3, ...
  map4 <- c(2,3,1)
  map5 <- c(3,1,2)
  map6 <- c(3,2,1)
  
  TypeEstimation <- c(sum(kmeans$kappa==map1[type]),sum(kmeans$kappa==map2[type]),sum(kmeans$kappa==map3[type]),
                      sum(kmeans$kappa==map4[type]),sum(kmeans$kappa==map5[type]),sum(kmeans$kappa==map6[type]))/N
  TypeEstimation_spline <- c(sum(kmeans_spline$kappa==map1[type]),sum(kmeans_spline$kappa==map2[type]),sum(kmeans_spline$kappa==map3[type]),
                             sum(kmeans_spline$kappa==map4[type]),sum(kmeans_spline$kappa==map5[type]),sum(kmeans_spline$kappa==map6[type]))/N

  classification <- c(max(TypeEstimation), max(TypeEstimation_spline))
  
  if (which.max(TypeEstimation)==1){
    map <- map1
  } else if (which.max(TypeEstimation)==2){
    map <- map2
  } else if (which.max(TypeEstimation)==3){
    map <- map3
  } else if (which.max(TypeEstimation)==4){
    map <- map4
  } else if (which.max(TypeEstimation)==5){
    map <- map5
  } else if (which.max(TypeEstimation)==6){
    map <- map6
  }
  
  if (which.max(TypeEstimation_spline)==1){
    map_spline <- map1
  } else if (which.max(TypeEstimation_spline)==2){
    map_spline <- map2
  } else if (which.max(TypeEstimation_spline)==3){
    map_spline <- map3
  } else if (which.max(TypeEstimation_spline)==4){
    map_spline <- map4
  } else if (which.max(TypeEstimation_spline)==5){
    map_spline <- map5
  } else if (which.max(TypeEstimation_spline)==6){
    map_spline <- map6
  }
  
  # Type-specific diff-in-diff
  CATTkmeans <- c(mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans$kappa[data[,2]]==3 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==3 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==3 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==3 & data[,3]==T & treatment[data[,2]]==0,1]) )
  weight <- c(sum(treatment==1 & kmeans$kappa==1), sum(treatment==1 & kmeans$kappa==2), sum(treatment==1 & kmeans$kappa==3))
  weight[is.nan(CATTkmeans)] <- 0
  ATTkmeans <- sum((CATTkmeans*weight)[is.nan(CATTkmeans)==0])/sum(weight)
  
  CATTkmeans_spline <- c(mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                         mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]),
                         mean(data[kmeans_spline$kappa[data[,2]]==3 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==3 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==3 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==3 & data[,3]==T & treatment[data[,2]]==0,1]))
  weight <- c(sum(treatment==1 & kmeans_spline$kappa==1), sum(treatment==1 & kmeans_spline$kappa==2), sum(treatment==1 & kmeans_spline$kappa==3))
  weight[is.nan(CATTkmeans_spline)] <- 0
  ATTkmeans_spline <- sum((CATTkmeans_spline*weight)[is.nan(CATTkmeans_spline)==0])/sum(weight)
  
  # Diff-in-diff
  ATTdid <- mean(data[data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
    mean(data[data[,3]==T & treatment[data[,2]]==0,1])
  
  # synthetic control
  synthdata <- cbind(matrix(data[treatment[data[,2]]==0,1],T+1),
                     matrix(data[treatment[data[,2]]==1,1],T+1))
  synthdata <- t(synthdata)
  ATTsc <- sc_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  # synthetic diff-in-diff
  ATTsynthdid <- synthdid_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  return(c(ATTdid,ATTsc,ATTsynthdid,
           CATTkmeans[map],ATTkmeans,
           CATTkmeans_spline[map_spline],ATTkmeans_spline,
           classification))
}

simulationK5 <- function(T,N,B,sd,rho){
  data <- matrix(0,(T+1)*N,5)
  data[,2] <- kronecker(1:N,rep(1,T+1))
  data[,3] <- 1:(T+1)
  data[,4] <- kronecker(sapply(runif(N,0,1),
                               function(v) 1+(v>0.1)+(v>0.3)+(v>0.5)+(v>0.75)),rep(1,T+1))
  data[,5] <- 1
  
  # serially correlated errors
  rhoMat <- sapply(1:(T+1),
                   function(t) rho^((t-1):(t-T-1)))
  rhoMat <- t(rhoMat)
  rhoMat[rhoMat > 1] <- 0
  epsilon <- rnorm(N*(T+1),0,sd*(1-rho^2)^(0.5))
  epsilon[(0:(N-1))*(T+1)+1] <- rnorm(N,0,sd)
  U <- c(sapply(1:N,
                function(i) rhoMat%*%epsilon[(i-1)*(T+1)+1:(T+1)]))
  
  # unit fixed-effects
  type <- apply(matrix(data[,4],T+1,N),2,mean)
  unitFE <- rnorm(N,37,17)*(type==1) + rnorm(N,39,17)*(type==2) + rnorm(N,35,17)*(type==3) +
    rnorm(N,36,17)*(type==4) + rnorm(N,38,17)*(type==5)
  
  # treatment status
  treatment <- (runif(N,0,1)<1/3)*(type<=3) + (runif(N,0,1)<2/3)*(type>3) 
  
  # generate outcome
  data[,1] <- kronecker(unitFE,rep(1,T+1)) +
    (2.06*(data[,3]-T))*(data[,4]==1) + (1.66*(data[,3]-T))*(data[,4]==2) +
    (1.26*data[,3]-T)*(data[,4]==3) + (0.4*data[,3]-T)*(data[,4]==4) +
    4*kronecker(treatment,rep(1,T+1))*(data[,4]<=3)*(data[,3]==T+1) +
    kronecker(treatment,rep(1,T+1))*(data[,4]>3)*(data[,3]==T+1) +
    U
  
  # Kmeans
  SubData <- data
  SubData[,1] <- data[,1] - c(0,data[1:(nrow(data)-1),1])
  SubData <- SubData[SubData[,3]<=T & SubData[,3]>1,]
  SubData[,3] <- SubData[,3]-1
  result <- matrix(0,N+2,B)
  
  for (t in 1:B){
    init <- ceiling(runif(N,0,2))
    kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
    kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
    result[,t] <- c(init,kmeans$MSE[length(kmeans$MSE)],kmeans_spline$MSE[length(kmeans_spline$MSE)])
  }
  
  init <- result[1:N,which.min(result[N+1,])]
  kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
  init <- result[1:N,which.min(result[N+2,])]
  kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
  
  # Type-specific diff-in-diff
  CATTkmeans <- c(mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]) )
  weight <- c(sum(treatment==1 & kmeans$kappa==1), sum(treatment==1 & kmeans$kappa==2))
  weight[is.nan(CATTkmeans)] <- 0
  ATTkmeans <- sum((CATTkmeans*weight)[is.nan(CATTkmeans)==0])/sum(weight)
  
  CATTkmeans_spline <- c(mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                         mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]) )
  weight <- c(sum(treatment==1 & kmeans_spline$kappa==1), sum(treatment==1 & kmeans_spline$kappa==2))
  weight[is.nan(CATTkmeans_spline)] <- 0
  ATTkmeans_spline <- sum((CATTkmeans_spline*weight)[is.nan(CATTkmeans_spline)==0])/sum(weight)
  
  # Diff-in-diff
  ATTdid <- mean(data[data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
    mean(data[data[,3]==T & treatment[data[,2]]==0,1])
  
  # synthetic control
  synthdata <- cbind(matrix(data[treatment[data[,2]]==0,1],T+1),
                     matrix(data[treatment[data[,2]]==1,1],T+1))
  synthdata <- t(synthdata)
  ATTsc <- sc_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  # synthetic diff-in-diff
  ATTsynthdid <- synthdid_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  return(c(ATTdid,ATTsc,ATTsynthdid,
           CATTkmeans,ATTkmeans,
           CATTkmeans_spline,ATTkmeans_spline,
           0,0))
}

simulationCon <- function(T,N,B,sd,rho){
  data <- matrix(0,(T+1)*N,5)
  data[,2] <- kronecker(1:N,rep(1,T+1))
  data[,3] <- 1:(T+1)
  data[,4] <- kronecker(runif(N,0,1),rep(1,T+1))
  data[,5] <- 1
  
  # serially correlated errors
  rhoMat <- sapply(1:(T+1),
                   function(t) rho^((t-1):(t-T-1)))
  rhoMat <- t(rhoMat)
  rhoMat[rhoMat > 1] <- 0
  epsilon <- rnorm(N*(T+1),0,sd*(1-rho^2)^(0.5))
  epsilon[(0:(N-1))*(T+1)+1] <- rnorm(N,0,sd)
  U <- c(sapply(1:N,
                function(i) rhoMat%*%epsilon[(i-1)*(T+1)+1:(T+1)]))
  
  # unit fixed-effects
  type <- apply(matrix(data[,4],T+1,N),2,mean)
  unitFE <- rnorm(N,0,17) + 37 + 2*type
  
  # treatment status
  treatment <- (runif(N,0,1)<1/3)*(type<=1/2) + (runif(N,0,1)<2/3)*(type>1/2) 
  
  # generate outcome
  data[,1] <- kronecker(unitFE,rep(1,T+1)) +
    (1.66-1.66*data[,4])*(data[,3]-T) +
    4*kronecker(treatment,rep(1,T+1))*(data[,4]<=1/2)*(data[,3]==T+1) +
    kronecker(treatment,rep(1,T+1))*(data[,4]>1/2)*(data[,3]==T+1) +
    U
  
  # Kmeans
  SubData <- data
  SubData[,1] <- data[,1] - c(0,data[1:(nrow(data)-1),1])
  SubData <- SubData[SubData[,3]<=T & SubData[,3]>1,]
  SubData[,3] <- SubData[,3]-1
  result <- matrix(0,N+2,B)
  
  for (t in 1:B){
    init <- ceiling(runif(N,0,2))
    kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
    kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
    result[,t] <- c(init,kmeans$MSE[length(kmeans$MSE)],kmeans_spline$MSE[length(kmeans_spline$MSE)])
  }
  
  init <- result[1:N,which.min(result[N+1,])]
  kmeans <- kmeans_fun(init,as.matrix(SubData),2,0,200)
  init <- result[1:N,which.min(result[N+2,])]
  kmeans_spline <- kmeans_fun_spline(init,as.matrix(SubData),2,0,200)
  
  # Type-specific diff-in-diff
  CATTkmeans <- c(mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                  mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                    mean(data[kmeans$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]) )
  weight <- c(sum(treatment==1 & kmeans$kappa==1), sum(treatment==1 & kmeans$kappa==2))
  weight[is.nan(CATTkmeans)] <- 0
  ATTkmeans <- sum((CATTkmeans*weight)[is.nan(CATTkmeans)==0])/sum(weight)
  
  CATTkmeans_spline <- c(mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==1 & data[,3]==T & treatment[data[,2]]==0,1]),
                         mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==1,1]) - 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
                           mean(data[kmeans_spline$kappa[data[,2]]==2 & data[,3]==T & treatment[data[,2]]==0,1]) )
  weight <- c(sum(treatment==1 & kmeans_spline$kappa==1), sum(treatment==1 & kmeans_spline$kappa==2))
  weight[is.nan(CATTkmeans_spline)] <- 0
  ATTkmeans_spline <- sum((CATTkmeans_spline*weight)[is.nan(CATTkmeans_spline)==0])/sum(weight)
  
  # Diff-in-diff
  ATTdid <- mean(data[data[,3]==T+1 & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T & treatment[data[,2]]==1,1]) - 
    mean(data[data[,3]==T+1 & treatment[data[,2]]==0,1]) + 
    mean(data[data[,3]==T & treatment[data[,2]]==0,1])
  
  # synthetic control
  synthdata <- cbind(matrix(data[treatment[data[,2]]==0,1],T+1),
                     matrix(data[treatment[data[,2]]==1,1],T+1))
  synthdata <- t(synthdata)
  ATTsc <- sc_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  # synthetic diff-in-diff
  ATTsynthdid <- synthdid_estimate(Y=synthdata,N0=sum(treatment==0),T0=T)[1]
  
  return(c(ATTdid,ATTsc,ATTsynthdid,
           CATTkmeans,ATTkmeans,
           CATTkmeans_spline,ATTkmeans_spline,
           0,0))
}

print_sim <- function(mat){
  
  if (nrow(mat)==11){
    ans <- list(ATT=round(1000*rbind(apply(mat[c(1:3,6,9),]-2,1,mean),
                                     apply((mat[c(1:3,6,9),]-2)^2,1,mean)) )/1000,
                CATT=round(1000*rbind(apply(mat[c(4:5,7:8),]-matrix(c(4,1),4,ncol(mat)),1,mean),
                                      apply((mat[c(4:5,7:8),]-matrix(c(4,1),4,ncol(mat)))^2,1,mean)) )/1000,
                Classification=round(1000*c(mean(mat[10,mat[10,]<1]),mean(mat[10,]>=0.95),mean(mat[10,]==1),mean(mat[11,mat[11,]<1]),mean(mat[11,]>=0.95),mean(mat[11,]==1)))/1000
    )
    rownames(ans$ATT) <- c("bias", "MSE")
    colnames(ans$ATT) <- c("DiD", "SC", "SDiD", "TDiD", "TDiDs")
    rownames(ans$CATT) <- c("bias", "MSE")
    colnames(ans$CATT) <- c("type1", "type2", "type1s", "type2")
  } else if (nrow(mat)==13){
    ans <- list(ATT=round(1000*rbind(apply(mat[c(1:3,7,11),]-2,1,mean),
                                     apply((mat[c(1:3,7,11),]-2)^2,1,mean)) )/1000,
                CATT=round(1000*rbind(apply(mat[c(4:6,8:10),]-matrix(c(5,1,0),6,ncol(mat)),1,mean),
                                      apply((mat[c(4:6,8:10),]-matrix(c(5,1,0),6,ncol(mat)))^2,1,mean)) )/1000,
                Classification=round(1000*c(mean(mat[12,mat[12,]<1]),mean(mat[12,]>=0.95),mean(mat[12,]==1),mean(mat[13,mat[13,]<1]),mean(mat[13,]>=0.95),mean(mat[13,]==1)))/1000
    )
    rownames(ans$ATT) <- c("bias", "MSE")
    colnames(ans$ATT) <- c("DiD", "SC", "SDiD", "TDiD", "TDiDs")
    rownames(ans$CATT) <- c("bias", "MSE")
    colnames(ans$CATT) <- c("type1", "type2", "type3", "type1s", "type2", "type3")
  }
  names(ans$Classification) <- c("mean_success", "lessthan5", "perfect", "mean_success", "lessthan2", "perfect")
  
  print(ans)
}
